# 查看词汇表和合并规则
import pickle
from itertools import islice

print("Loading vocab...")
vocab_pth = r"C:\Users\cwj\Desktop\cs336\a1\assignment1-basics-main\cs336\cs336_basics\out\ts-train-vocab.txt"
with open(vocab_pth, "rb") as f:
    vocab = pickle.load(f)
    print(f"Loaded {len(vocab)} tokens")
    # 打印前五个键值对
    max_len: int = 0
    for key, value in islice(vocab.items(), len(vocab)):
        # print(f"{key}: {value}")
        if len(value) > max_len:
            max_len = len(value)
            print(f"{key}: {value}")


print("Loading merges...")
merges_pth = r"C:\Users\cwj\Desktop\cs336\a1\assignment1-basics-main\cs336\cs336_basics\out\ts-train-merges.txt"
with open(merges_pth, "rb") as f:
    merges = pickle.load(f)
    print(f"Loaded {len(merges)} merges")
    print(merges[:5])
